REVEAL¶

Your Real Estate Visualization and Exploration tool for Analytical Learning¶

Used libraries :

In [1]:
import pandas as pd
import plotly.express as px
from plotly import graph_objects as go
import calendar
import math

For the visual of the notebook :

In [25]:
import plotly.io as pio
pio.renderers.default='notebook'
pio.templates.default='plotly_dark'

1 - Loading¶

Load the data file directly with the usefull columns.

In [3]:
paths = {
    "2019" : "valeursfoncieres-2019.txt",
    "2020" : "valeursfoncieres-2020.txt",
    "2021" : "valeursfoncieres-2021.txt",
    "2022" : "valeursfoncieres-2022.txt",
}

root_path = "assets/"

used_colomns = [ "Date mutation", "Valeur fonciere", "No voie", "Type de voie", "Voie", "Code postal", \
        "Type local", "Surface reelle bati", "Nombre pieces principales", "Surface terrain", "Commune", "Code departement" ]

data = pd.read_csv(root_path + paths["2022"], sep='|', usecols=used_colomns, dtype={ "Code departement" : str })

data.head()
Out[3]:
Date mutation Valeur fonciere No voie Type de voie Voie Code postal Commune Code departement Type local Surface reelle bati Nombre pieces principales Surface terrain
0 03/01/2022 55000,00 13.0 RUE DE LA LIBERTE 1000.0 BOURG-EN-BRESSE 01 Appartement 24.0 1.0 NaN
1 03/01/2022 143000,00 NaN NaN CHAMP COCHET 1480.0 SAVIGNEUX 01 NaN NaN NaN 84.0
2 03/01/2022 143000,00 NaN NaN CHAMP COCHET 1480.0 SAVIGNEUX 01 NaN NaN NaN 88.0
3 03/01/2022 143000,00 98.0 RTE DE LA DOMBES 1480.0 SAVIGNEUX 01 Appartement 140.0 3.0 NaN
4 04/01/2022 300,00 NaN NaN AUX PIERRES 1480.0 MESSIMY SUR SAONE 01 NaN NaN NaN 510.0

Load France population for deeper analysis.

In [4]:
data_population = pd.read_csv(root_path + "population-dep.csv", sep=';', usecols=['Code Département', 'Population'])
data_population.rename(columns={ "Code Département" : "Code departement" }, inplace=True)
data_population.sort_values(by="Code departement", inplace=True)
data_population.head()
Out[4]:
Code departement Population
27 01 649654
38 02 534286
51 03 338978
21 04 161664
8 05 141576

2 - Cleaning¶

Real estate value of 0€, surface of 0m^2, room count of 0, are non-sense.

In [5]:
data.dropna(subset=["Valeur fonciere", "Surface reelle bati", "Nombre pieces principales"], inplace=True)

data["Date mutation"] = pd.to_datetime(data["Date mutation"], format='%d/%m/%Y')

data["Valeur fonciere"] = data["Valeur fonciere"].str.replace(',', '.').astype(float)
data["Nombre pieces principales"] = data["Nombre pieces principales"].astype(int)

data.drop(data[data["Surface reelle bati"] == 0].index, inplace=True)
data.drop(data[data["Nombre pieces principales"] == 0].index, inplace=True)

data.info()
<class 'pandas.core.frame.DataFrame'>
Index: 1388730 entries, 0 to 4617588
Data columns (total 12 columns):
 #   Column                     Non-Null Count    Dtype         
---  ------                     --------------    -----         
 0   Date mutation              1388730 non-null  datetime64[ns]
 1   Valeur fonciere            1388730 non-null  float64       
 2   No voie                    1379529 non-null  float64       
 3   Type de voie               1235716 non-null  object        
 4   Voie                       1388690 non-null  object        
 5   Code postal                1388695 non-null  float64       
 6   Commune                    1388730 non-null  object        
 7   Code departement           1388730 non-null  object        
 8   Type local                 1388730 non-null  object        
 9   Surface reelle bati        1388730 non-null  float64       
 10  Nombre pieces principales  1388730 non-null  int64         
 11  Surface terrain            866011 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(1), object(5)
memory usage: 137.7+ MB

3 - Filtering¶

In [6]:
price_min = 0
price_max = 1000000

data = data[data["Valeur fonciere"].between(left=price_min, right=price_max)]

4 - Interpreting and visualizing¶

Mean square meter price

In [7]:
data.insert(1, "Prix/m2", [ p / s for p, s in zip(data["Valeur fonciere"], data["Surface reelle bati"]) ])
data.head()
Out[7]:
Date mutation Prix/m2 Valeur fonciere No voie Type de voie Voie Code postal Commune Code departement Type local Surface reelle bati Nombre pieces principales Surface terrain
0 2022-01-03 2291.666667 55000.0 13.0 RUE DE LA LIBERTE 1000.0 BOURG-EN-BRESSE 01 Appartement 24.0 1 NaN
3 2022-01-03 1021.428571 143000.0 98.0 RTE DE LA DOMBES 1480.0 SAVIGNEUX 01 Appartement 140.0 3 NaN
5 2022-01-06 2361.111111 255000.0 282.0 RTE DE POISATON 1560.0 MANTENAY-MONTLIN 01 Maison 108.0 5 649.0
8 2022-01-03 4166.666667 525000.0 217.0 PL DE LA CROIX BLANCHE 1390.0 SAINT-ANDRE-DE-CORCY 01 Appartement 126.0 4 628.0
12 2022-01-05 547.008547 64000.0 12.0 BD DE BROU 1000.0 BOURG-EN-BRESSE 01 Appartement 117.0 2 NaN
In [8]:
s = data.groupby(data["Date mutation"].dt.month)["Prix/m2"].mean()

fig = px.line(x=list(calendar.month_name)[1:], y=list(s.values), labels={'x':'', 'y':''})
fig.show()

Sold surface

In [9]:
s = data.groupby(data["Date mutation"].dt.month)["Surface reelle bati"].mean()
meanprice = list(s.values)

fig = px.line(x=list(calendar.month_name)[1:], y=meanprice, labels={'x':'', 'y':''})
fig.show()

Mean real estate value per department

In [10]:
s = data.groupby("Code departement")["Valeur fonciere"].mean()
dep, meanprice = list(s.index), list(s.values)

geojson="https://france-geojson.gregoiredavid.fr/repo/departements.geojson"

fig = go.Figure(go.Choroplethmapbox(geojson=geojson, featureidkey = "properties.code", \
        locations=dep, z=meanprice, zauto = True, colorscale = 'viridis', showscale = True))
fig.update_layout(mapbox_style="carto-positron", mapbox_zoom=4, mapbox_center = {"lat": 47, "lon": 2.3522}, \
        margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

Mean square meter price per department

In [11]:
s = data.groupby("Code departement")["Prix/m2"].mean()
dep, meanpricem2 = list(s.index), list(s.values)

fig = go.Figure(go.Choroplethmapbox(geojson=geojson, featureidkey = "properties.code", \
        locations=dep, z=meanpricem2, zauto = True, colorscale = 'viridis', showscale = True))
fig.update_layout(mapbox_style="carto-positron", mapbox_zoom=4, mapbox_center = {"lat": 47, "lon": 2.3522}, \
        margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

Sale value by typologies (limited to 6 rooms)

In [12]:
s = data.groupby([data["Nombre pieces principales"], data["Date mutation"].dt.month])["Nombre pieces principales"].count()

df = pd.DataFrame({str(i) + (" pièce" if i == 1 else " pièces") : s[i].tolist() for i in range(1, 7)})
df.insert(0, "Date mutation", list(calendar.month_name)[1:])

fig = px.bar(df, x="Date mutation", y=[str(i) + (" pièce" if i == 1 else " pièces") for i in range(1, 7)], labels={'value':'', 'Date mutation':''})
fig.show()

Mean square meter price by typologies (limited to 6 rooms)

In [13]:
s = data.groupby("Nombre pieces principales")["Prix/m2"].mean()
s = s.head(6)

fig = px.bar(y=[str(x) + (" pièce" if x == 1 else " pièces") for x in s.index], x=s.values, orientation='h', labels={'y':'', 'x':'Prix/m2'})
fig.show()

Mean square meter price by building types

In [14]:
s = data.groupby("Type local")["Prix/m2"].mean()

fig = px.bar(y=s.index, x=s.values, orientation='h', labels={'y':'', 'x':'Prix/m2'})
fig.show()

Share of typologies

In [15]:
s = data.groupby("Nombre pieces principales")["Nombre pieces principales"].count()
s = s.head(20)

fig = px.pie(names=[str(x) + (" pièce" if x == 1 else " pièces") for x in s.index], values=list(s.values))
fig.show()

Share of buildings types

In [16]:
s = data.groupby("Type local")["Type local"].count()

fig = px.pie(names=s.index, values=s.values)
fig.show()

Real estate value by department population

In [17]:
dataj = data.merge(data_population, on="Code departement", how="left")

s = dataj.groupby("Population")["Valeur fonciere"].mean()

fig = px.line(x=s.index, y=s.values, labels={'x':'Population', 'y':'Real estate value'})
fig.show()

Sale count by department population

In [18]:
s = dataj.groupby("Population")["Population"].count()

fig = px.line(x=s.index, y=s.values, labels={'x':'Population', 'y':'Sales'})
fig.show()

Mean sold surface by department population

In [19]:
s = dataj.groupby("Population")["Surface reelle bati"].mean()

fig = px.line(x=s.index, y=s.values, labels={'x':'Population', 'y':'Sales'})
fig.show()

Square meter price by department population

In [20]:
s = dataj.groupby("Population")["Prix/m2"].mean()

fig = px.line(x=s.index, y=s.values, labels={'x':'Population', 'y':'Sales'})
fig.show()
In [24]:
s = dataj.groupby(["Type local", "Population"])["Type local"].count()

fig = go.Figure()
fig.add_trace(go.Scatter(x=s["Maison"].index, y=s["Maison"].values, mode='lines', name='Maison'))
fig.add_trace(go.Scatter(x=s["Maison"].index, y=s["Appartement"].values, mode='lines', name='Appartement'))
fig.show()

'''
fig = px.scatter(x="sepal_width", y="sepal_length", color="species", size='petal_length', hover_data=['petal_width'])
fig.show()
'''
Out[24]:
'\nfig = px.scatter(x="sepal_width", y="sepal_length", color="species", size=\'petal_length\', hover_data=[\'petal_width\'])\nfig.show()\n'